In [1]:
    
from bs4 import BeautifulSoup
import requests
import js2py
import pandas as pd
from itertools import permutations
    
In [2]:
    
source = requests.get('https://www.passportindex.org/comparebyPassport.php').content.decode('utf-8')
    
In [3]:
    
soup = BeautifulSoup(source)
for s in soup.findAll('script'):
    if 'com_c_vf' in s.text:
        script = 'function a() {' \
            + s.text + \
            '; return [\
                Object.keys(com_c_vf).map(function(z){return [z, com_c_vf[z]]}),\
                Object.keys(com_c_vf).map(function(z){return [z, com_c_voa[z]]}),\
                Object.keys(com_c_vf).map(function(z){return [z, com_c_eta[z]]}),\
                Object.keys(com_c_vf).map(function(z){return [z, so_vf[z]]}),\
            ]};a()'
data = js2py.eval_js(script)
    
In [4]:
    
so = {
    '0': 'VF',
    '1': 7,
    '2': 14,
    '3': 90,
    '4': 28,
    '5': 30,
    '6': 180,
    '7': 360,
    '8': 31,
    '9': '-',
    '12': 60,
    '13': 15,
    '14': 120,
    '15': 240,
    '16:': 45,
    '17': 21,
    '18': 42,
}
    
In [5]:
    
url = 'https://gist.githubusercontent.com/ilyankou/b2580c632bdea4af2309dcaa69860013/raw/420fb417bcd17d833156efdf64ce8a1c3ceb2691/country-codes'
codes = pd.read_csv(url, dtype=str).fillna('NA').set_index('ISO2')
def fix_iso2(x):
    o = {
        'UK': 'GB',
        'RK': 'XK'
    }
    return o[x] if x in o else x
    
In [6]:
    
multiindex = pd.MultiIndex.from_tuples(
    list(permutations(codes.index, 2)), names=['Passport', 'Destination']
)
# By default, all countries need visas, so set all values to 0
tidy_iso2 = pd.DataFrame(index=multiindex)
tidy_iso2['Code'] = 'VR'
# i=0 for visa free (so value of 3)
# 1=1 for visa on arrival (value of 2)
# i=2 for eta (value of 1)
for i in range(3):
    for j in range(len(data[i])):
        passport = fix_iso2( data[i][j][0] ) # correct UK and Kosovo codes
        countries = data[i][j][1].split(',')
        
        # For visa free, we put number of dates
        if i == 0:
            vf2days = data[3][j][1].split(',')
            
        for k in range(len(countries)):
            country = countries[k]
            if country == '':
                continue
                
            country = fix_iso2(country)  # correct UK and Kosovo codes
            
            tidy_iso2.loc[(passport, country), 'Code'] = so[vf2days[k]] if i == 0 else 'ETA' if i == 1 else 'VOA'
    
In [7]:
    
tidy_iso2.to_csv('passport-index-tidy-iso2.csv')
tidy_iso2.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
    .fillna(-1).to_csv('passport-index-matrix-iso2.csv')
    
In [8]:
    
tidy_iso3 = tidy_iso2.copy(deep=True).reset_index()
tidy_iso3['Passport'] = tidy_iso3['Passport'].apply(lambda x: codes.loc[x]['ISO3'])
tidy_iso3['Destination'] = tidy_iso3['Destination'].apply(lambda x: codes.loc[x]['ISO3'])
tidy_iso3.to_csv('passport-index-tidy-iso3.csv', index=False)
tidy_iso3.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
    .fillna(-1).to_csv('passport-index-matrix-iso3.csv')
    
In [9]:
    
tidy_names = tidy_iso2.copy(deep=True).reset_index()
tidy_names['Passport'] = tidy_names['Passport'].apply(lambda x: codes.loc[x]['Country'])
tidy_names['Destination'] = tidy_names['Destination'].apply(lambda x: codes.loc[x]['Country'])
tidy_names.to_csv('passport-index-tidy.csv', index=False)
tidy_names.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
    .fillna(-1).to_csv('passport-index-matrix.csv')
    
In [ ]:
    
    
In [ ]: